import math
import os
import sys

inputFileHandler = open("/data/weijiang/BingDataSetDownloading/urls-related/url-black-list-20120605-raw","r")
outputFileHandler = open("/data/weijiang/BingDataSetDownloading/urls-related/url-black-list-20120605","w")
urlCounter = 0
for currentLine in inputFileHandler:
    if len( currentLine.strip().split(":") ) == 4:
        currentURL = currentLine.strip().split(":")[2] + ":" + currentLine.strip().split(":")[3]
        if currentURL.startswith("http://") or currentURL.startswith("https://"):
            outputFileHandler.write(currentURL + "\n")
            #print currentURL
            urlCounter += 1
        else:
            print currentURL

print "urlCounter:",urlCounter
inputFileHandler.close()
outputFileHandler.close()